from spider import Spider
'''
url = 'https://www.x23us.com/html/66/66656/'

book_author_regex = '<meta name="og:novel:author" content="(.*?)"/> '

book_name_regex = '<meta name="og:novel:book_name" content="(.*?)"/>'

book_chapter_regex = '<td class="L"><a href="(.*?).html">(.*?)</a></td>'

x = Spider(url).get_info(book_author = book_author_regex,
                         book_name = book_name_regex,
                         book_chapter=book_chapter_regex,
                         )

'''


class BookInfoApi(Spider):
    

    def book_info(self):
    
        self.book_author_regex = '<meta name="og:novel:author" content="(.*?)"/> '

        self.book_name_regex = '<meta name="og:novel:book_name" content="(.*?)"/>'

        self.book_chapter_regex = '<td class="L"><a href="(.*?).html">(.*?)</a></td>'

        self.book_info = self.get_info(
                                   book_author = self.book_author_regex,
                                   book_name = self.book_name_regex,
                                   book_chapter= self.book_chapter_regex,
                                  )

        return self.book_info

class ChapterInfo(Spider):

    def content_info(self):

        self.chapter_name_regex = '<h1>(.*?)</h1>'
        self.chapter_content_regex = '<dd id="contents">(.*?)</dd>'
        self.chapter_next_regex = '<dd><h3><a href="/html/66/66656/">上一页</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <a href="/html/66/66656/" title="圣墟最新章节更新列表">返回最新章节列表</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <a href="/html/66/66656/27429412.html">下一页</a></h3></dd>'

        self.chapter_info = self.get_info(
                                   chapter_name = self.chapter_name_regex,
                                   chapter_content = self.chapter_content_regex,
                                   chapter_next = self.chapter_next_regex,
                                  )
        return self.chapter_info

def save(book_url = 'https://www.x23us.com/html/4/4779/'):

    book_info = BookInfoApi(book_url).book_info()
    
    with open('书名：{}-作者：{}.txt'.format(book_info['book_name'][0],book_info['book_author'][0]),'w') as f:
            
        for chapter_url,chapter_name in book_info['book_chapter'][:10]:
            
            chapter_url = book_url+ chapter_url + '.html'

            chapter_info = ChapterInfo(chapter_url).content_info()
            print(chapter_info['chapter_name'][0])
            #print(chapter_info)
            f.write(chapter_info['chapter_name'][0])
            f.write('\n\n')
            f.write(chapter_info['chapter_content'][0].replace('&nbsp;',' ').replace('<br />','\n'))
            f.write('\n\n')
            f.write('*'*20)
            f.write('\n\n')

    
    

if __name__ == '__main__':
    '''
    book_info:
    {'book_author': ['塞林格'],
    'book_name': ['麦田里的守望者'],
    'book_chapter': [('1524372', '·内容提要·'), ('1524375', '·作品赏析·'), ('1524378', '第01节'), ('1524381', '第02节'), ('1524384', '第03节'), ('1524387', '第04节'), ('1524390', '第05节'), ('1524394', '第06节'), ('1524397', '第07节'), ('1524400', '第08节'), ('1524403', '第09节'), ('1524406', '第10节'), ('1524410', '第11节'), ('1524413', '第12节'), ('1524416', '第13节'), ('1524419', '第14节'), ('1524422', '第15节'), ('1524425', '第16节'), ('1524428', '第17节'), ('1524431', '第18节'), ('1524434', '第19节'), ('1524437', '第20节'), ('1524441', '第21节'), ('1524444', '第22节'), ('1524447', '第23节'), ('1524450', '第24节'), ('1524453', '第25节'), ('1524456', '第26节')]}

    chapter_info:
    {'chapter_name': ['正文 ·内容提要·'],
    'chapter_content': ['&n...],
    'chapter_next': []
    }
    '''

    save()
    
    


        






























        

    

    

